Make the prediction grid

Author

Max Lindmark

Published

March 19, 2025

Intro

Make an evenly spaced UTM prediction grid with all spatially varying covariates for the diet and the biomass data

library(tidyverse)
library(tidync)
library(tidyterra)
library(tidylog)
library(sp)
library(raster)
Warning: package 'raster' was built under R version 4.3.3
library(devtools)
library(RCurl)
library(sdmTMB)
library(terra)
Warning: package 'terra' was built under R version 4.3.3
library(ncdf4)
library(chron)
library(viridis)
library(readxl)

library(ggsidekick)
theme_set(theme_sleek()) # devtools::install_github("seananderson/ggsidekick") # not on CRAN

# Source code for map plots
devtools::source_url("https://raw.githubusercontent.com/maxlindmark/pred-prey-overlap/main/R/functions/map-plot.R")
Warning: package 'sf' was built under R version 4.3.3
Reading layer `StatRec_map_Areas_Full_20170124' from data source 
  `/Users/maxlindmark/Dropbox/Max work/R/pred-prey-overlap/data/shapefiles/ICES-StatRec-mapto-ICES-Areas/StatRec_map_Areas_Full_20170124.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 11074 features and 17 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -44 ymin: 36 xmax: 69 ymax: 85
Geodetic CRS:  WGS 84
Reading layer `ICES_Areas_20160601_cut_dense_3857' from data source 
  `/Users/maxlindmark/Dropbox/Max work/R/pred-prey-overlap/data/shapefiles/ICES_areas/ICES_Areas_20160601_cut_dense_3857.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 66 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -4898058 ymin: 4300621 xmax: 7625385 ymax: 30240970
Projected CRS: WGS 84 / Pseudo-Mercator
# Set path
home <- here::here()

Read data and depth-raster

# Read data
d <- read_csv(paste0(home, "/data/clean/catch_clean.csv"))

Make the grid with depth

First make a grid for the biomass data, then subset that based on the extend of the stomach data

x <- d$X
y <- d$Y
z <- chull(x, y)

coords <- cbind(x[z], y[z])

coords <- rbind(coords, coords[1, ])

plot(coords[, 1] ~ coords[, 2]) # plot data

sp_poly <- sp::SpatialPolygons(
  list(sp::Polygons(list(sp::Polygon(coords)), ID = 1))
)

sp_poly_df <- sp::SpatialPolygonsDataFrame(sp_poly,
  data = data.frame(ID = 1)
)
cell_width <- 3

pred_grid <- expand.grid(
  X = seq(min(d$X), max(d$X), cell_width),
  Y = seq(min(d$Y), max(d$Y), cell_width),
  year = c(1993:2023)
)

ggplot(pred_grid |> filter(year == 2019), aes(X, Y)) +
  geom_point(size = 0.1) +
  theme_void() +
  coord_sf()

sp::coordinates(pred_grid) <- c("X", "Y")

inside <- !is.na(sp::over(pred_grid, as(sp_poly_df, "SpatialPolygons")))

pred_grid <- pred_grid[inside, ]

pred_grid <- as.data.frame(pred_grid)

ggplot(data = filter(pred_grid, year == 1999), aes(X * 1000, Y * 1000)) +
  geom_point(size = 0.001, alpha = 0.5) +
  NULL

plot_map +
  geom_point(data = filter(pred_grid, year == 1999), aes(X * 1000, Y * 1000), size = 0.001, alpha = 0.5) +
  NULL
Warning: `guide_colourbar()` needs continuous scales.
Warning: Removed 112 rows containing missing values or values outside the scale range
(`geom_point()`).

# Add lat and lon
# Need to go from UTM to lat long for this one...
# https://stackoverflow.com/questions/30018098/how-to-convert-utm-coordinates-to-lat-and-long-in-r
xy <- as.matrix(pred_grid |> dplyr::select(X, Y) |> mutate(X = X * 1000, Y = Y * 1000))
v <- vect(xy, crs = "+proj=utm +zone=33 +datum=WGS84  +units=m")
y <- project(v, "+proj=longlat +datum=WGS84")
lonlat <- geom(y)[, c("x", "y")]

pred_grid$lon <- lonlat[, 1]
pred_grid$lat <- lonlat[, 2]

ggplot(filter(pred_grid, year == 1999), aes(lon, lat)) +
  geom_point()

# Add depth now to remove islands and remaining land
# https://gis.stackexchange.com/questions/411261/read-multiple-layers-raster-from-ncdf-file-using-terra-package
# https://emodnet.ec.europa.eu/geoviewer/
dep_raster <- terra::rast(paste0(home, "/data/covariates/depth/Mean depth natural colour (with land).nc"))
class(dep_raster)
[1] "SpatRaster"
attr(,"package")
[1] "terra"
crs(dep_raster, proj = TRUE)
[1] "+proj=longlat +datum=WGS84 +no_defs"
plot(dep_raster)

pred_grid$depth <- terra::extract(dep_raster, pred_grid |> dplyr::select(lon, lat))$elevation

ggplot(pred_grid, aes(lon, lat, color = depth * -1)) +
  geom_point()

pred_grid$depth <- pred_grid$depth * -1

pred_grid <- pred_grid |> drop_na(depth)

Add month_year variable for matching with raster layers

dat <- pred_grid |> #replicate_df(pred_grid, "quarter", c(1, 4)) |>
  mutate(
    #month = ifelse(quarter == 1, 2, 11), # most common months
    month = 11, # most common months
    month_year = paste(month, year, sep = "_")
  )

Add ICES areas

# https://stackoverflow.com/questions/34272309/extract-shapefile-value-to-point-with-r
# https://gis.ices.dk/sf/
shape <- shapefile(paste0(home, "/data/shapefiles/ICES-StatRec-mapto-ICES-Areas/StatRec_map_Areas_Full_20170124.shp"))
head(shape)
  OBJECTID ID ICESNAME SOUTH WEST NORTH EAST AREA_KM2 stat_x stat_y Area_27
1        1 47     47A0  59.0  -44  59.5  -43     3178  -43.5  59.25  14.b.2
2        2 48     48A0  59.5  -44  60.0  -43     3132  -43.5  59.75  14.b.2
3        3 49     49A0  60.0  -44  60.5  -43     3085  -43.5  60.25  14.b.2
4        4 50     50A0  60.5  -44  61.0  -43     3038  -43.5  60.75  14.b.2
5        5 51     51A0  61.0  -44  61.5  -43     2991  -43.5  61.25  14.b.2
6        6 52     52A0  61.5  -44  62.0  -43     2944  -43.5  61.75  14.b.2
          Perc      MaxPer RNDMaxPer AreasList Shape_Leng Shape_Area
1 100.00000000 100.0000000       100    14.b.2          3        0.5
2  84.12674145  84.1267414        84    14.b.2          3        0.5
3  24.99803694  24.9980369        25    14.b.2          3        0.5
4  11.97744244  11.9774424        12    14.b.2          3        0.5
5   3.89717625   3.8971762         4    14.b.2          3        0.5
6   0.28578428   0.2857843         0    14.b.2          3        0.5
pts <- SpatialPoints(cbind(dat$lon, dat$lat),
  proj4string = CRS(proj4string(shape))
)

dat$subdiv <- over(pts, shape)$Area_27

# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(dat$subdiv))
[1] "3.d.24"   "3.d.25"   "3.d.26"   "3.d.27"   "3.d.28.2"
dat <- dat |>
  mutate(
    sub_div = factor(subdiv),
    sub_div = fct_recode(subdiv,
      "24" = "3.d.24",
      "25" = "3.d.25",
      "26" = "3.d.26",
      "27" = "3.d.27",
      "28" = "3.d.28.1",
      "28" = "3.d.28.2",
      "29" = "3.d.29"
    ),
    sub_div = as.character(sub_div)
  ) |>
  filter(sub_div %in% c("24", "25", "26", "27", "28", 2)) |>
  filter(lat > 54 & lat < 59 & lon < 22)
Warning: There was 1 warning in `.fun()`.
ℹ In argument: `sub_div = fct_recode(...)`.
Caused by warning:
! Unknown levels in `f`: 3.d.28.1, 3.d.29
# Add ICES rectangles
dat$ices_rect <- mapplots::ices.rect2(lon = dat$lon, lat = dat$lat)

dat <- dat |> dplyr::select(-subdiv)

Add Saduria

saduria <- terra::rast(paste0(home, "/data/covariates/saduria/FWBiomassm_raster_19812019presHighweightcor_no0_newZi.tif"))

WGS84 <- "+proj=longlat +datum=WGS84"

saduria_latlon <- terra::project(saduria, WGS84)

density_saduria <- terra::extract(saduria_latlon, dat |> dplyr::select(lon, lat))

dat$saduria <- density_saduria$FWBiomassm_raster_19812019presHighweightcor_no0_newZi

Add pelagic

# Read data on rectangle level
spr <- read_xlsx(paste0(home, "/data/covariates/pelagic/N and B per Rect. 1991-2023.xlsx"),
  sheet = 4
) |>
  rename(
    ices_rect = RECT,
    year = ANNUS
  ) |>
  mutate_at(vars(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`), ~ replace_na(., 0)) |> # I need to replace NA with 0, else I can't sum! According to Olavi who sent the data, NA means 0 and nothing else. Rectangle*year combinations that do not have information about biomass are simply not included in this data
  mutate(
    ices_rect = as.factor(ices_rect),
    Species = "Sprat",
    biomass_spr = `1` + `2` + `3` + `4` + `5` + `6` + `7` + `8`,
    id_r = paste(ices_rect, year, sep = "_")
  ) |>
  filter(year >= 1993) |>
  dplyr::select(year, ices_rect, biomass_spr) |>
  summarise(biomass_spr = mean(biomass_spr), .by = c(ices_rect, year))

her <- read_xlsx(paste0(home, "/data/covariates/pelagic/N and B per Rect. 1991-2023.xlsx"),
  sheet = 3
) |>
  rename(
    ices_rect = RECT,
    year = ANNUS
  ) |>
  mutate_at(vars(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`), ~ replace_na(., 0)) |> # I need to replace NA with 0, else I can't sum! According to Olavi who sent the data, NA means 0 and nothing else. Rectangle*year combinations that do not have information about biomass are simply not included in this data
  mutate(
    ices_rect = as.factor(ices_rect),
    Species = "Sprat",
    biomass_her = `1` + `2` + `3` + `4` + `5` + `6` + `7` + `8`,
    id_r = paste(ices_rect, year, sep = "_")
  ) |>
  filter(year >= 1993) |>
  dplyr::select(year, ices_rect, biomass_her) |>
  summarise(biomass_her = mean(biomass_her), .by = c(ices_rect, year))

pelagics <- left_join(spr, her, by = c("year", "ices_rect"))

dat <- dat |> tidylog::left_join(pelagics, by = c("year", "ices_rect"))

# If there are no data in a specific rectangle, use the sub-division mean. If no values in the sub div, use annual mean
dat <- dat |>
  mutate(
    biomass_spr_sd_mean = mean(biomass_spr, na.rm = TRUE),
    biomass_her_sd_mean = mean(biomass_her, na.rm = TRUE),
    .by = c(sub_div, year)
  ) |>
  mutate(
    biomass_spr = ifelse(is.na(biomass_spr), biomass_spr_sd_mean, biomass_spr),
    biomass_her = ifelse(is.na(biomass_her), biomass_her_sd_mean, biomass_her)
  ) |>
  mutate(
    biomass_spr_yr_mean = mean(biomass_spr, na.rm = TRUE),
    biomass_her_yr_mean = mean(biomass_her, na.rm = TRUE),
    .by = c(year)
  ) |>
  mutate(
    biomass_spr = ifelse(is.na(biomass_spr), biomass_spr_yr_mean, biomass_spr),
    biomass_her = ifelse(is.na(biomass_her), biomass_her_yr_mean, biomass_her)
  )

Add environmental covariates

Oxygen

# Source:
# Source:
# https://data.marine.copernicus.eu/product/BALTICSEA_MULTIYEAR_BGC_003_012/download?dataset=cmems_mod_bal_bgc_my_P1M-m_202303
# Print details
print(nc_open("/Users/maxlindmark/Dropbox/Max work/R/spatial-metabolic-index/data/covariates/cmems_mod_bal_bgc_my_P1M-m_1742291088556.nc"))
File /Users/maxlindmark/Dropbox/Max work/R/spatial-metabolic-index/data/covariates/cmems_mod_bal_bgc_my_P1M-m_1742291088556.nc (NC_FORMAT_NETCDF4):

     1 variables (excluding dimension variables):
        float o2b[longitude,latitude,time]   (Contiguous storage)  
            units: mmol m-3
            _FillValue: NaN
            standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
            long_name: Dissolved Oxygen Concentration
            cell_methods: time: mean
            interval_operation: 90 s
            interval_write: 1 d
            missing_value: -999
            online_operation: average
            unit_long: millimole O2 per unit volume

     3 dimensions:
        time  Size:372 
            standard_name: time
            long_name: Time
            units: seconds since 1970-01-01 00:00:00
            calendar: gregorian
            axis: T
        latitude  Size:300 
            standard_name: latitude
            long_name: Latitude
            units: degrees_north
            unit_long: Degrees North
            axis: Y
        longitude  Size:342 
            standard_name: longitude
            long_name: Longitude
            units: degrees_east
            unit_long: Degrees East
            axis: X

    12 global attributes:
        Conventions: CF-1.11
        title: CMEMS ERGOM monthly integrated model fields
        institution: Baltic MFC, PU Danish Meteorological Institute
        source: CMEMS BAL MFC NEMO model output converted to NetCDF
        history: Fri Dec 16 10:51:30 2022: cdo -z zip_1 --sortname copy /net/isilon/ifs/arch/home/iri/BALMFC/Nemo4.0/BALMFCMYP2022_univariateSST//BAL-ERGOM_BGC-DailyMeans-19930101.nc tmp_19930101.nc
        contact: servicedesk.cmems@mercator-ocean.eu
        references: https://marine.copernicus.eu/
        comment: Data on cropped native product grid. Horizontal velocities destagged
        subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
        subset:productId: BALTICSEA_MULTIYEAR_BGC_003_012
        subset:datasetId: cmems_mod_bal_bgc_my_P1M-m_202303
        subset:date: 2025-03-18T09:44:48.557Z
oxy_tibble <- tidync("/Users/maxlindmark/Dropbox/Max work/R/spatial-metabolic-index/data/covariates/cmems_mod_bal_bgc_my_P1M-m_1742291088556.nc") |>
  hyper_tibble() |>
  mutate(date = as_datetime(time, origin = "1970-01-01")) |>
  mutate(
    month = month(date),
    day = day(date),
    year = year(date),
    month_year = paste(month, year, sep = "_"),
    oxy = o2b * 0.0223909
  )

# Loop through all year combos, extract the temperatures at the data locations
oxy_list <- list()

for (i in unique(dat$month_year)) {
  d_sub <- filter(dat, month_year == i)
  oxy_tibble_sub <- filter(oxy_tibble, month_year == i)

  # Convert to raster
  ggplot(oxy_tibble_sub, aes(longitude, latitude)) +
    geom_point(size = 0.1)

  oxy_raster <- as_spatraster(oxy_tibble_sub,
    xycols = 2:3,
    crs = "WGS84", digits = 3
  )

  ggplot() +
    geom_spatraster(data = oxy_raster$oxy, aes(fill = oxy)) +
    scale_fill_viridis(option = "magma") +
    ggtitle(i)

  # Extract from raster
  d_sub$oxy <- terra::extract(
    oxy_raster$oxy,
    d_sub |> dplyr::select(lon, lat)
  )$oxy

  # Save
  oxy_list[[i]] <- d_sub
}

d_oxy <- bind_rows(oxy_list)

Temperature & Salinity

# Source
# Salinity and temperature
# https://data.marine.copernicus.eu/product/BALTICSEA_MULTIYEAR_PHY_003_011/download?dataset=cmems_mod_bal_phy_my_P1M-m_202303
# Print details
print(nc_open("/Users/maxlindmark/Dropbox/Max work/R/spatial-metabolic-index/data/covariates/cmems_mod_bal_phy_my_P1M-m_1742290961273.nc"))
File /Users/maxlindmark/Dropbox/Max work/R/spatial-metabolic-index/data/covariates/cmems_mod_bal_phy_my_P1M-m_1742290961273.nc (NC_FORMAT_NETCDF4):

     2 variables (excluding dimension variables):
        float bottomT[longitude,latitude,time]   (Contiguous storage)  
            units: degrees_C
            _FillValue: NaN
            standard_name: sea_water_potential_temperature_at_sea_floor
            long_name: Sea floor potential temperature
            cell_methods: time: mean
            interval_operation: 90 s
            interval_write: 1 d
            missing_value: -999
            online_operation: average
            unit_long: degrees Celsius
        float sob[longitude,latitude,time]   (Contiguous storage)  
            units: 0.001
            _FillValue: NaN
            standard_name: sea_water_salinity_at_sea_floor
            long_name: Sea water salinity at sea floor
            cell_methods: time: mean
            interval_operation: 90 s
            interval_write: 1 d
            missing_value: -999
            online_operation: average
            unit_long: 0.001

     3 dimensions:
        time  Size:372 
            standard_name: time
            long_name: Time
            units: seconds since 1970-01-01 00:00:00
            calendar: gregorian
            axis: T
        latitude  Size:300 
            standard_name: latitude
            long_name: Latitude
            units: degrees_north
            unit_long: Degrees North
            axis: Y
        longitude  Size:342 
            standard_name: longitude
            long_name: Longitude
            units: degrees_east
            unit_long: Degrees East
            axis: X

    11 global attributes:
        Conventions: CF-1.11
        title: CMEMS NEMO monthly integrated model fields
        institution: Baltic MFC, PU Danish Meteorological Institute
        source: CMEMS BAL MFC NEMO model output converted to NetCDF
        contact: servicedesk.cmems@mercator-ocean.eu
        references: https://marine.copernicus.eu/
        comment: Data on cropped native product grid. Horizontal velocities destagged
        subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
        subset:productId: BALTICSEA_MULTIYEAR_PHY_003_011
        subset:datasetId: cmems_mod_bal_phy_my_P1M-m_202303
        subset:date: 2025-03-18T09:42:41.273Z
st_tibble <- tidync("/Users/maxlindmark/Dropbox/Max work/R/spatial-metabolic-index/data/covariates/cmems_mod_bal_phy_my_P1M-m_1742290961273.nc") |>
  hyper_tibble() |>
  mutate(date = as_datetime(time, origin = "1970-01-01")) |>
  mutate(
    month = month(date),
    day = day(date),
    year = year(date),
    month_year = paste(month, year, sep = "_")
  )

# Loop through all year combos, extract the temperatures at the data locations
st_list <- list()

for (i in unique(dat$month_year)) {
  d_sub <- filter(dat, month_year == i)
  st_tibble_sub <- filter(st_tibble, month_year == i)

  # Convert to raster
  ggplot(st_tibble_sub, aes(longitude, latitude)) +
    geom_point(size = 0.1)

  st_raster <- as_spatraster(st_tibble_sub,
    xycols = 3:4,
    crs = "WGS84", digits = 3
  )

  ggplot() +
    geom_spatraster(data = st_raster$bottomT, aes(fill = bottomT)) +
    scale_fill_viridis(option = "magma") +
    ggtitle(i)

  ggplot() +
    geom_spatraster(data = st_raster$sob, aes(fill = sob)) +
    scale_fill_viridis(option = "magma") +
    ggtitle(i)

  # Extract from raster
  d_sub$temp <- terra::extract(
    st_raster$bottomT,
    d_sub |> dplyr::select(lon, lat)
  )$bottomT

  d_sub$salinity <- terra::extract(
    st_raster$sob,
    d_sub |> dplyr::select(lon, lat)
  )$sob

  # Save
  st_list[[i]] <- d_sub
}

d_st <- bind_rows(st_list)
env_dat <- d_st |>
  left_join(d_oxy |> dplyr::select(-X, -Y, -depth, -year, -month),
    by = c("month_year", "lon", "lat")
  ) |>
  dplyr::select(month_year, lon, lat, temp, salinity, oxy)
# Now join these data with the full_dat
dat_full <- left_join(dat, env_dat, by = c("month_year", "lon", "lat")) |>
  drop_na(temp, salinity, oxy)

Save

pred_grid_93_08 <- dat_full |> filter(year <= 2008)
pred_grid_09_23 <- dat_full |> filter(year >= 2009)

write_csv(pred_grid_93_08, file = paste0(home, "/data/clean/pred_grid_(1_2).csv"))
write_csv(pred_grid_09_23, file = paste0(home, "/data/clean/pred_grid_(2_2).csv"))